package cn.udrm.elasticsearch.fileread;

import org.apache.commons.lang.StringUtils;
import org.apache.poi.hssf.usermodel.HSSFDateUtil;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;

import java.io.File;
import java.io.InputStream;
import java.math.BigDecimal;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.Iterator;
import java.util.List;

/**
 * Excel超大数据读取，抽象Excel2007读取器，excel2007的底层数据结构是xml文件，采用SAX的事件驱动的方法解析 
 * xml，需要继承DefaultHandler，在遇到文件内容时，事件会触发，这种做法可以大大降低 内存的耗费，特别使用于大数据量的文件。 
 * @version 2014-9-2 
 */  
public abstract class ExcelReader extends DefaultHandler {
  
    // 共享字符串表  
    private SharedStringsTable sst;
      
    // 上一次的内容  
    private String lastContents;  
    private boolean nextIsString;  
  
    private int sheetIndex = -1;  
    private List<String> rowList = new ArrayList<String>();
  
    // 当前行  
    private int curRow = 0;  
    // 当前列  
    private int curCol = 0;  
    // 日期标志  
    private boolean dateFlag;  
    // 数字标志  
    private boolean numberFlag;  
  
    private boolean isTElement;  
  
    /** 
     * 遍历工作簿中所有的电子表格 
     * @param filename 
     * @throws Exception 
     */  
    public void process(String filename) throws Exception {  
        OPCPackage pkg = OPCPackage.open(filename);
        XSSFReader r = new XSSFReader(pkg);
        SharedStringsTable sst = r.getSharedStringsTable();  
        XMLReader parser = fetchSheetParser(sst);
        Iterator<InputStream> sheets = r.getSheetsData();
        while (sheets.hasNext()) {  
            curRow = 0;  
            sheetIndex++;  
            InputStream sheet = sheets.next();  
            InputSource sheetSource = new InputSource(sheet);
            parser.parse(sheetSource);  
            sheet.close();  
        }  
    }  
  
    /** 
     * 只遍历一个电子表格，其中sheetId为要遍历的sheet索引，从1开始，1-3 
     * @param filename 
     * @param sheetId 
     * @throws Exception 
     */  
    public void process(String filename, int sheetId) throws Exception {  
        OPCPackage pkg = OPCPackage.open(filename);  
        XSSFReader r = new XSSFReader(pkg);  
        SharedStringsTable sst = r.getSharedStringsTable();  
        XMLReader parser = fetchSheetParser(sst);  
        // 根据 rId# 或 rSheet# 查找sheet  
        InputStream sheet2 = r.getSheet("rId" + sheetId);  
        sheetIndex++;  
        InputSource sheetSource = new InputSource(sheet2);  
        parser.parse(sheetSource);  
        sheet2.close();  
    }  
  
    public XMLReader fetchSheetParser(SharedStringsTable sst)  
            throws SAXException {
        XMLReader parser = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser");
        this.sst = sst;  
        parser.setContentHandler(this);  
        return parser;  
    }  
  
    public void startElement(String uri, String localName, String name,  
            Attributes attributes) throws SAXException {
  
//      System.out.println("startElement: " + localName + ", " + name + ", " + attributes);  
  
        // c => 单元格  
        if ("c".equals(name)) {  
            // 如果下一个元素是 SST 的索引，则将nextIsString标记为true  
            String cellType = attributes.getValue("t");  
            if ("s".equals(cellType)) {  
                nextIsString = true;  
            } else {  
                nextIsString = false;  
            }  
            // 日期格式  
            String cellDateType = attributes.getValue("s");  
            if ("1".equals(cellDateType)) {  
                dateFlag = true;  
            } else {  
                dateFlag = false;  
            }  
            String cellNumberType = attributes.getValue("s");  
            if ("2".equals(cellNumberType)) {  
                numberFlag = true;  
            } else {  
                numberFlag = false;  
            }  
  
        }  
        // 当元素为t时  
        if ("t".equals(name)) {  
            isTElement = true;  
        } else {  
            isTElement = false;  
        }  
  
        // 置空  
        lastContents = "";  
    }  
  
    public void endElement(String uri, String localName, String name)  
            throws SAXException {  
  
//      System.out.println("endElement: " + localName + ", " + name);  
  
        // 根据SST的索引值的到单元格的真正要存储的字符串  
        // 这时characters()方法可能会被调用多次  
        if (nextIsString) {  
            try {  
                int idx = Integer.parseInt(lastContents);  
                lastContents = new XSSFRichTextString(sst.getEntryAt(idx))
                        .toString();  
            } catch (Exception e) {  
  
            }  
        }  
        // t元素也包含字符串  
        if (isTElement) {  
            String value = lastContents.trim();  
            rowList.add(curCol, value);  
            curCol++;  
            isTElement = false;  
            // v => 单元格的值，如果单元格是字符串则v标签的值为该字符串在SST中的索引  
            // 将单元格内容加入rowlist中，在这之前先去掉字符串前后的空白符  
        } else if ("v".equals(name)) {  
            String value = lastContents.trim();  
            value = value.equals("") ? " " : value;  
            try {  
                // 日期格式处理  
                if (dateFlag) {  
                    Date date = HSSFDateUtil.getJavaDate(Double.valueOf(value));
                    SimpleDateFormat dateFormat = new SimpleDateFormat("dd/MM/yyyy");
                    value = dateFormat.format(date);  
                }  
                // 数字类型处理  
                if (numberFlag) {  
                    BigDecimal bd = new BigDecimal(value);
                    value = bd.setScale(3, BigDecimal.ROUND_UP).toString();  
                }  
            } catch (Exception e) {  
                // 转换失败仍用读出来的值  
            }  
            rowList.add(curCol, value);  
            curCol++;  
        } else {  
            // 如果标签名称为 row ，这说明已到行尾，调用 optRows() 方法  
            if (name.equals("row")) {  
                getRows(sheetIndex + 1, curRow, rowList);  
                rowList.clear();  
                curRow++;  
                curCol = 0;  
            }  
        }  
  
    }  
  
    public void characters(char[] ch, int start, int length)  
            throws SAXException {  
        // 得到单元格内容的值  
        lastContents += new String(ch, start, length);  
    }  
  
    /** 
     * 获取行数据回调 
     * @param sheetIndex 
     * @param curRow 
     * @param rowList 
     */  
    public abstract void getRows(int sheetIndex, int curRow, List<String> rowList);  
  
    /** 
     * 测试方法 
     */  
    public static void main(String[] args) throws Exception {
        long l=System.currentTimeMillis();

        File file=new File("C:\\Users\\xurongbei\\Desktop\\xrb\\_MU3UcAH9Eeu2jveyhCtLEA.xlsx");;
  
ExcelReader reader = new ExcelReader() {
    public void getRows(int sheetIndex, int curRow, List<String> rowList) {
          if(rowList.size()>0){
              System.out.println("Sheet:" + sheetIndex + ", Row:" + curRow + ", Data:" + StringUtils.join(rowList,","));
          }

    }  
};  
//reader.process(file, 1);
        reader.process(file.getPath());
        System.out.println("==========耗时："+(System.currentTimeMillis()-l));
  
    }  
  }